# load the raw data stored in the ./Data folder


# company info
company_info <-
  read.csv("Data/Company info.csv", stringsAsFactors = FALSE)

# quarterly data
SSE_data <-
  read.csv("Data/Sydney Stock Exchange Data - Quarterly.csv",
           stringsAsFactors = FALSE) %>%
  mutate(Dates = as.Date(Dates, format = "%d/%m/%Y")) %>%
  merge(
    company_info,
    by.x = c("Security", "Broad.Company.Name"),
    by.y = c("Security", "Broad.Company.Name")
  ) %>%
  arrange(Dates, Broad.Company.Name) %>%
  group_by(Dates) %>%
  mutate(Market.Capitalisation.Rank = rank(-Market.Capitalisation, na.last = TRUE)) %>%
  ungroup() %>%
  as_tibble()

SSE_data_ordinaries <-
  filter(SSE_data, Share.type %in% c("Ordinary", "(A)")) #exclude B and C class shares

# # Annual company-level data
SSE_annual <-
  read.csv("./Data/Sydney Stock Exchange Data - Annual.csv",
           stringsAsFactors = F) %>%
  mutate(Year = as.Date(Year)) %>%
  as_tibble()


# other time series (those calculated externally)

external_series_annual <-
  read_excel('Data/Other time series.xlsx', sheet = "Annual") %>% mutate(Dates = as.Date(Dates))
external_series_quarterly <-
  read_excel('Data/Other time series.xlsx', sheet = "Quarterly") %>% mutate(Dates = as.Date(Dates))

# modern company-level data

sheet_list <- c("Cap", "Shares", "Price")
name_list <- c("Market.Capitalisation", "Shares", "Price")


external_companies <-
  Reduce(merge, lapply(seq_along(sheet_list), function(x) {
    y <-
      read_excel('Data/Modern company-level data.xlsx', sheet = sheet_list[x]) %>%
      mutate(Dates = as.Date(Dates)) %>%
      mutate_if(is.character, as.numeric) %>%
      gather(ASX_code, !!name_list[x],-Dates) %>%
      mutate(Dates = ceiling_date(Dates, unit = "month") - 1)
  })) %>%
  merge(read_excel('Data/Modern Company-level data.xlsx', sheet = "Sector"),
        all = T) %>%
  merge(read.csv('Data/ASX 200 list.csv', stringsAsFactors = F) %>% mutate(Dates = as.Date(Dates, format = "%d/%m/%Y"))) %>%
  filter(InASX200) %>%
  group_by(Dates) %>%
  mutate(
    Market.Capitalisation.Rank = rank(-Market.Capitalisation, na.last = TRUE),
    Broad.sector = ifelse((substr(GICS_code, 1, 2) == '10') |
                            (GICS_code == '151040'),
                          "Resources",
                          ifelse(substr(GICS_code, 1, 2) == '40', "Financial", "Other")
    )
  )
